
###########################################################
##### Haiti elite network project  		          			#####
##### functions				 			                      		#####
##### 2021 mar 03                   									#####
###########################################################


library(foreign)
library(doBy)
library(xtable)
library(gtools)
library(data.table)
library(sna)
library(igraph)
library(stringr)
library(multiwayvcov)
library(stargazer)
library(lmtest)
library(sandwich)
library(car)
library(reshape2)
library(glmnet)
library(AER)
library(lubridate)
library(reshape)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
library(tidyr)


options(scipen=999,"stringsAsFactors"=F)

# basic functions

first <- function(x, ...){
  x[1]
}

count <- function(x, ...){
  length(x)
}

sumfun <- function(x, ...){
  c(mean=mean(x, ...), var=(length(x)-1/length(x))*var(x, ...), num=length(x))
}

sumfun2 <- function(x, ...){
  c(sum=sum(x, ...))
}

count.reps <- function(df) {
  hash  <- do.call("paste", c(df, sep = "\r"))
  cbind(unique(df), Freq = unclass(table(hash)))
}

standardize <- function(x) {
  m <- mean(x, na.rm=T)
  sd <- sd(x, na.rm=T)
  x <- (x - m)/sd
}

lag_group <- function(var, index, lag) {
  var <- unlist(tapply(var, INDEX = index, FUN = function(x) shift(x, lag)))
}

get_mode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

## functions to clean company names

fillers <- function(x) {
  x <- paste(' ',x,' ')
  x <- gsub('\\.',' ',x)
  x <- gsub('\\,',' ',x)
  x <- gsub("   "," ",x)
  x <- gsub(" SA "," ",x)
  x <- gsub(" S A "," ",x)
  x <- gsub("MR "," ",x)
  x <- gsub(" INC ","",x)
  x <- gsub(" LTD "," ",x)
  x <- gsub(" LLC "," ",x)
  x <- gsub(" CO "," ",x)
  x <- gsub(" IND "," ",x)
  x <- gsub(" INDUSTRIEL "," ",x)
  x <- gsub(" CONSTRUCTION "," ",x)
  x <- gsub(" DISTRIBUTION "," ",x)
  x <- gsub(" ENT "," ",x)
  x <- gsub(" ENTERPRISE "," ",x)
  x <- gsub(" ENTREPRISES "," ",x)
  x <- gsub(" ET "," & ",x)
  x <- gsub(" CORP "," ",x)
  x <- gsub(" AND "," & ",x)
  x <- gsub(" IMP "," ",x)
  x <- gsub(" EXP "," ",x)
  x <- gsub(" EXPORTS "," ",x)
  x <- gsub(" IMPORTS "," ",x)
  x <- gsub(" IMP/EXP "," ",x)
  x <- gsub(" IMP-EXP "," ",x)
  x <- gsub(" EXPORT "," ",x)
  x <- gsub(" IMPORT/ EXPORT"," ",x)
  x <- gsub(" IMPORT "," ",x)
  x <- gsub(" SHPG "," ",x)
  x <- gsub(" SHIPPING "," ",x)
  x <- gsub(" ASSOC "," ",x)
  x <- gsub(" SOC "," ",x)
  x <- gsub(" ASSOCIATION "," ",x)
  x <- gsub(" SOCIETE "," ",x)
  x <- gsub(" LIMITED "," ",x)
  x <- gsub(" TRDG "," ",x)
  x <- gsub(" TRADING "," ",x)
  x <- gsub(" INTL "," ",x)
  x <- gsub(" INTER'L "," ",x)
  x <- gsub(" INT'L "," ",x)
  x <- gsub(" INTERNATIONAL "," ",x)
  x <- gsub(" MIAMI "," ",x)
  x <- gsub(" RIO HAINA "," ",x)
  x <- gsub(" LIMA "," ",x)
  x <- gsub(" IZMIR "," ",x)
  x <- gsub(" VALPARAISO "," ",x)
  x <- gsub(" VERACRUZ "," ",x)
  x <- gsub(" JIANGMEN "," ",x)
  x <- gsub("QINGDAO"," ",x)
  x <- gsub(" QINGDAO "," ",x)
  x <- gsub(" CALLAO "," ",x)
  x <- gsub(" HALIFAX "," ",x)
  x <- gsub(" SAO PAULO "," ",x)
  x <- gsub(" NINGBO "," ",x)
  x <- gsub(" SHANGHAI "," ",x)
  x <- gsub(" MANZANILLO "," ",x)
  x <- gsub(" ALTAMIRA "," ",x)
  x <- gsub(" CHANCERELLES "," ",x)
  x <- gsub(" SAN PEDRO SULA "," ",x)
  x <- gsub(" SERVICES "," ",x)
  x <- gsub(" N/A "," ",x)
  x <- gsub("^ +", "", x)
  x <- gsub(" *$", "", x)
  return(x)
}

names <- function(x) {
  x <- paste(' ',x,' ')
  x <- gsub(" JEAN "," ",x)
  x <- gsub(" PIERRE "," ",x)
  x <- gsub(" JOSEPH "," ",x)
  x <- gsub(" MARIE "," ",x)
  x <- gsub(" CHARLES "," ",x)
  x <- gsub(" LOUIS "," ",x)
  x <- gsub(" FELIX "," ",x)
  x <- gsub(" YVES "," ",x)
  x <- gsub(" FRANCOIS "," ",x)
  x <- gsub(" JACQUES "," ",x)
  x <- gsub(" LAURENT "," ",x)
  x <- gsub(" AUGUSTIN "," ",x)
  x <- gsub("^ +", "", x)
  x <- gsub(" *$", "", x)
  return(x)
}

homog <- function(x) {
  x <- paste(' ',x,' ')
  x <- gsub('\\.',' ',x)
  x <- gsub('\\,',' ',x)
  x <- gsub(" SA "," ",x)
  x <- gsub(" S A "," ",x)
  x <- gsub("MR ","",x)
  x <- gsub(" INC ","",x)
  x <- gsub(" LTD ","",x)
  x <- gsub(" LLC ","",x)
  x <- gsub(" CO ","",x)
  x <- gsub(" IND "," INDUSTRIEL ",x)
  x <- gsub(" ENT "," ENTREPRISES ",x)
  x <- gsub(" ENT "," ENTREPRISES ",x)
  x <- gsub(" ETS "," ENTREPRISES ",x )
  x <- gsub(" ENTERPRISES "," ENTREPRISES ",x)
  x <- gsub(" ENTERPRISE "," ENTREPRISES ",x)
  x <- gsub(" ENTREPRISE "," ENTREPRISES ",x)
  x <- gsub(" ET "," & ",x)
  x <- gsub(" CORP ","",x)
  x <- gsub(" AND "," & ",x)
  x <- gsub(" IMP "," IMPORT ",x)
  x <- gsub(" EXP "," EXPORT ",x)
  x <- gsub(" EXPORTS "," EXPORT ",x)
  x <- gsub(" IMPORTS "," IMPORT ",x)
  x <- gsub(" IMP/EXP "," IMPORT EXPORT ",x)
  x <- gsub(" IMP-EXP "," IMPORT EXPORT ",x)
  x <- gsub(" IMPORT/EXPORT "," IMPORT EXPORT ",x)	
  x <- gsub(" IMPORT / EXPORT "," IMPORT EXPORT ",x)
  x <- gsub(" IMPORT & EXPORT "," IMPORT EXPORT ",x)
  x <- gsub(" IMPORT/ EXPORT"," IMPORT EXPORT ",x)
  x <- gsub(" SHPG "," SHIPPING ",x)
  x <- gsub(" ASSOC "," ASSOCIATION ",x)
  x <- gsub(" SOC "," SOCIETE ",x)
  x <- gsub(" LIMITED ","",x)
  x <- gsub(" TRDG "," TRADING",x)
  x <- gsub(" INTL "," INTERNATIONAL ",x)
  x <- gsub(" INTER'L "," INTERNATIONAL ",x)
  x <- gsub(" INT'L "," INTERNATIONAL ",x)
  x <- gsub(" MIAMI "," ",x)
  x <- gsub(" IZMIR "," ",x)
  x <- gsub(" RIO HAINA "," ",x)
  x <- gsub(" JAWAHARLAL NEHR "," ",x)
  x <- gsub(" MIAMI "," ",x)
  x <- gsub(" RIO HAINA "," ",x)
  x <- gsub(" LIMA "," ",x)
  x <- gsub(" VALPARAISO "," ",x)
  x <- gsub(" VERACRUZ "," ",x)
  x <- gsub(" JIANGMEN "," ",x)
  x <- gsub(" QINGDAO "," ",x)
  x <- gsub("QINGDAO"," ",x)
  x <- gsub(" CALLAO "," ",x)
  x <- gsub(" HALIFAX "," ",x)
  x <- gsub(" SAO PAULO "," ",x)
  x <- gsub(" NINGBO "," ",x)
  x <- gsub(" SHANGHAI "," ",x)
  x <- gsub(" MANZANILLO "," ",x)
  x <- gsub(" ALTAMIRA "," ",x)
  x <- gsub(" HAITI "," ",x)
  x <- gsub(" N/A "," ",x)
  x <- gsub("  "," ",x)
  x <- gsub("^ +", "", x)
  x <- gsub(" *$", "", x)
  return(x)
}

# function to generate a time variable

make.time <- function(a) {
  u <- tapply(1:length(a),a,function(x){
    y <- 1:length(x)
    names(y) <- x
    y}
  )
  v <- unlist(u)
  w <- as.numeric(unlist(lapply(u,names)))
}

# function to reverse strings

strrev <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, 
                             collapse="")

# function to create dummy if the only product for a given shipper is 
# a certain thing(s)

only <- function(x,prod) {
  temp <- ifelse(x==prod,1,0)
  ind <- ifelse(sum(temp) == length(temp),1,0)
}

only2 <- function(x,prod1,prod2) {
  temp <- ifelse(x==prod1 | x==prod2,1,0)
  pub <- ifelse(sum(temp) == length(temp),1,0)
}


last <- function(name){
  name <- paste(" ",name," ",sep="")
  name <- gsub(" DE "," DE_",name)
  name <- gsub(" LE "," LE_",name)
  name <- gsub(" LA "," LA_",name)
  name <- gsub("^ +", "", name)
  name <- gsub(" *$", "", name)
  temp <- sapply(strsplit(name,split=""),
                 function(str) {paste(rev(str),collapse="")})
  temp2 <- unlist(lapply(strsplit(temp,split=" "),
                         FUN=function(x) x[[1]]))
  temp3 <- sapply(strsplit(temp2,split=""),
                  function(str) {paste(rev(str),collapse="")})
  LAST <- temp3
  name <- gsub("_"," ",name)
  LAST <- gsub("_"," ",LAST)
}


## set regimes

reg <- data.frame('year'=sort(c(seq(1991,1993,1),rep(seq(1994,2013),12))),
                  'month'=c(rep(NA,3),rep(seq(1,12),20)),
                  'regime'=c('n1','n1','n1',                                  # 1991, 1992, 1993
                             rep("n1", 9), rep("d1", 3), rep("d1", 12),       # 1994, 1995
                             rep('d1',12),rep('d1',12),rep('d1',12),          # 1996, 1997, 1998
                             rep('d1',12),rep('d1',12),rep('d1',12),          # 1999, 2000, 2001
                             rep('d1',12),rep('d1',12),                       # 2002, 2003
                             rep('d1',2),rep('n2',10),                        # 2004
                             rep('n2',12),rep('n2',5), rep('d2',7),           # 2005, 2006           
                             rep('d2',12),rep('d2',12),                       # 2007, 2008
                             rep('d2',12),rep("d2",5), rep('d3',7),           # 2009, 2010
                             rep('d3',12),rep('d3',12), rep('d3',12)))        # 2011, 2012, 2013
reg$qtr <- ifelse(reg$month>0 & reg$month<4, 1, 
                  ifelse(reg$month>3 & reg$month<7, 2,
                         ifelse(reg$month>6 & reg$month<10, 3,
                                ifelse(reg$month>9, 4, NA))))

regq <- unique(cbind.data.frame("year" = reg$year, "qtr" = reg$qtr, "regime" = reg$regime))
regq$regime[regq$year==2004 & regq$qtr==1] <- "d1"
regq$regime[regq$year==2006 & regq$qtr==2] <- "n2"
regq$regime[regq$year==2010 & regq$qtr==2] <- "d3"
regq <- unique(regq)

regq$qtr[is.na(regq$qtr)==T] <- "1"

rega <- data.frame('year'=sort(c(seq(1991,2013,1))),
                  'regime'=c('n1','n1','n1','n1',
                             'd1','d1','d1','d1','d1','d1','d1',
                             'd1','d1','n2','n2','d2',
                             'd2','d2','d2','d2','d3','d3', 'd3'))


table(reg$regime)
table(regq$regime)
table(rega$regime)


## function to strip last name

last <- function(name){
  temp <- sapply(strsplit(name,split=""),
                 function(str) {paste(rev(str),collapse="")})
  temp2 <- unlist(lapply(strsplit(temp,split=" "),
                         FUN=function(x) x[[1]]))
  temp3 <- sapply(strsplit(temp2,split=""),
                  function(str) {paste(rev(str),collapse="")})
  LAST <- temp3
}


## reachability function

reachability <- function(g, m) {
  reach_mat = matrix(nrow = vcount(g), 
                     ncol = vcount(g))
  for (i in 1:vcount(g)) {
    reach_mat[i,] = 0
    this_node_reach <- subcomponent(g, i, mode = m)
    
    for (j in 1:(length(this_node_reach))) {
      alter = this_node_reach[j]
      reach_mat[i, alter] = 1
    }
  }
  return(reach_mat)
}


# ## make function for clustered and robust ses
# 
# source("02_Code/clusterfxn.R")
# 
# robust <- function(model) {
#   X <- model.matrix(model)
#   # number of obs
#   n <- dim(X)[1]
#   # n of predictors
#   k <- dim(X)[2]
#   # calculate stan errs as in the above
#   u <- matrix(resid(model))
#   # meat part Sigma is a diagonal with u^2 as elements
#   meat1 <- t(X) %*% diag(diag(crossprod(t(u)))) %*% X
#   # degrees of freedom adjust
#   dfc <- n/(n-k)
#   # like before
#   se <- sqrt(dfc*diag(solve(crossprod(X)) %*% meat1 %*% solve(crossprod(X))))
#   return(se)
# }


## make lag function

shift<-function(x,shift_by){
  stopifnot(is.numeric(shift_by))
  stopifnot(is.numeric(x))
  
  if (length(shift_by)>1)
    return(sapply(shift_by,shift, x=x))
  
  out<-NULL
  abs_shift_by=abs(shift_by)
  if (shift_by > 0 )
    out<-c(tail(x,-abs_shift_by),rep(NA,abs_shift_by))
  else if (shift_by < 0 )
    out<-c(rep(NA,abs_shift_by), head(x,-abs_shift_by))
  else
    out<-x
  out
}